import subprocess
import csv
import iocsv
import pymysql
import statistics

bd = pymysql.connect(host='localhost', 
  user='XXX', 
  password='XXX', 
  db='GHTorrent_2019', 
  charset='utf8mb4', 
  cursorclass=pymysql.cursors.DictCursor)

cursor = bd.cursor()

data_out = open('data/all.csv', 'w')
w_data_out = csv.writer(data_out)

months = iocsv.read_csv_months("data.csv")

i = 1
lang = None
author = 0

for m in months:

    # Merged PRs
    sql_merged = "select count(prh.id) as num_merged_prs from projects p, users u, pull_requests pr, pull_request_history prh where p.id = pr.base_repo_id and pr.id = prh.pull_request_id and p.owner_id = u.id and p.name = '{}' and u.login = '{}' and CAST(prh.created_at AS DATE) between '{}' and '{}' and action = 'merged'".format(m['repo'], m['owner'], m['date_before'], m['date_after'])
    cursor.execute(sql_merged)
    num_merged = cursor.fetchone()
    if num_merged is None:
        num_merged = 0
    else:
        num_merged = num_merged['num_merged_prs']

    # Nonmerged PRs
    sql_nonmerged = "select count(prh.id) as num_nonmerged_prs from projects p, users u, pull_requests pr, pull_request_history prh where p.id = pr.base_repo_id and pr.id = prh.pull_request_id and p.owner_id = u.id and p.name = '{}' and u.login = '{}' and CAST(prh.created_at AS DATE) between '{}' and '{}' and action = 'closed' and 'merged' not in (select action from pull_request_history where pull_request_id = pr.id);".format(m['repo'], m['owner'], m['date_before'], m['date_after'])
    cursor.execute(sql_nonmerged)
    num_non = cursor.fetchone()
    if num_non is None:
        num_non = 0
    else:
        num_non = num_non['num_nonmerged_prs']


    # Number of comments merged
    sql_comment_merged = "select count(*) as num_comments_merged from projects p, issues i, issue_comments c, users u, users b, pull_requests pr, pull_request_history prh where i.id = c.issue_id and p.id = i.repo_id and i.pull_request = 1 and p.owner_id = u.id and p.name = '{}' and u.login = '{}' and CAST(i.created_at AS DATE) between '{}' and '{}' and pr.id = i.pull_request_id and pr.id = prh.pull_request_id and prh.action = 'merged' and b.id = c.user_id and b.login <> '{}' group by i.id;".format(m['repo'], m['owner'], m['date_before'], m['date_after'], m['bot'])    
    rows_count = cursor.execute(sql_comment_merged)
    num_comments = []
    median_comments_merged = 0
    if rows_count > 0:
        rows = cursor.fetchall()
        for row in rows: 
            num_comments.append(row['num_comments_merged'])
        median_comments_merged = statistics.median(num_comments)


    # Number of comments nonmerged
    sql_comment_non = "select count(*) as num_comments_nonmerged from projects p, issues i, issue_comments c, users u, users b, pull_requests pr, pull_request_history prh where i.id = c.issue_id and p.id = i.repo_id and i.pull_request = 1 and p.owner_id = u.id and p.name = '{}' and u.login = '{}' and CAST(i.created_at AS DATE) between '{}' and '{}' and pr.id = i.pull_request_id and pr.id = prh.pull_request_id and prh.action = 'closed' and 'merged' not in (select action from pull_request_history where pull_request_id = pr.id) and b.id = c.user_id and b.login <> '{}' group by i.id;".format(m['repo'], m['owner'], m['date_before'], m['date_after'], m['bot'])
    rows_count = cursor.execute(sql_comment_non)
    num_comments = []
    median_comments_non = 0
    if rows_count > 0:
        rows = cursor.fetchall()
        for row in rows: 
            num_comments.append(row['num_comments_nonmerged'])
        median_comments_non = statistics.median(num_comments)


    # Sum of bot comments merged
    sql_comment_merged = "select count(*) as num_comments_bot_merged from projects p, issues i, issue_comments c, users u, users b, pull_requests pr, pull_request_history prh where i.id = c.issue_id and p.id = i.repo_id and i.pull_request = 1 and p.owner_id = u.id and p.name = '{}' and u.login = '{}' and CAST(i.created_at AS DATE) between '{}' and '{}'  and pr.id = i.pull_request_id and pr.id = prh.pull_request_id and prh.action = 'merged' and b.id = c.user_id and b.login = '{}';".format(m['repo'], m['owner'], m['date_before'], m['date_after'], m['bot'])    
    cursor.execute(sql_comment_merged)
    comments_bot_merged = cursor.fetchone()
    if comments_bot_merged is None:
        comments_bot_merged = 0
    else:
        comments_bot_merged = comments_bot_merged['num_comments_bot_merged']
 
    # Sum of bot comments merged
    sql_comment_merged = "select count(*) as num_comments_bot_unmerged from projects p, issues i, issue_comments c, users u, users b, pull_requests pr, pull_request_history prh where i.id = c.issue_id and p.id = i.repo_id and i.pull_request = 1 and p.owner_id = u.id and p.name = '{}' and u.login = '{}' and CAST(i.created_at AS DATE) between '{}' and '{}' and pr.id = i.pull_request_id and pr.id = prh.pull_request_id and prh.action = 'closed' and 'merged' not in (select action from pull_request_history where pull_request_id = pr.id) and b.id = c.user_id and b.login = '{}';".format(m['repo'], m['owner'], m['date_before'], m['date_after'], m['bot'])    
    cursor.execute(sql_comment_merged)
    comments_bot_nonmerged = cursor.fetchone()
    if comments_bot_nonmerged is None:
        comments_bot_nonmerged = 0
    else:
        comments_bot_nonmerged = comments_bot_nonmerged['num_comments_bot_unmerged']


    # Avg close time merged
    sql_close_time_mer = "select timestampdiff(minute, prh.created_at, min(prh2.created_at)) as minutes_merged from projects p, users u, pull_requests pr, pull_request_history prh, pull_request_history prh2 where p.id = pr.base_repo_id and pr.id = prh.pull_request_id and p.owner_id = u.id and p.name = '{}' and u.login = '{}' and CAST(prh.created_at AS DATE) between '{}' and '{}' and prh.action = 'opened' and prh2.action = 'merged' and prh.pull_request_id = prh2.pull_request_id group by pr.id;".format(m['repo'], m['owner'], m['date_before'], m['date_after'])    
    #sql_close_time = "select timestampdiff(minute, prh.created_at, min(prh2.created_at)) as minutes from projects p, users u, pull_requests pr, pull_request_history prh, pull_request_history prh2 where p.id = pr.base_repo_id and pr.id = prh.pull_request_id and p.owner_id = u.id and p.name = '{}' and u.login = '{}' and prh.created_at between '{}' and '{}' and prh.action = 'opened' and prh2.action = 'closed' and prh.pull_request_id = prh2.pull_request_id group by pr.id;".format(m['repo'], m['owner'], m['date_before'], m['date_after'])
    rows_count = cursor.execute(sql_close_time_mer)
    close_time = []
    median_close_time_merged = 0
    if rows_count > 0:
        rows = cursor.fetchall()
        for row in rows: 
            close_time.append(row['minutes_merged'])
        median_close_time_merged = statistics.median(close_time)
    

    # Avg close time non-merged
    sql_close_time_non = "select timestampdiff(minute, prh.created_at, min(prh2.created_at)) as minutes_nonmerged from projects p, users u, pull_requests pr, pull_request_history prh, pull_request_history prh2 where p.id = pr.base_repo_id and pr.id = prh.pull_request_id and p.owner_id = u.id and p.name = '{}' and u.login = '{}' and CAST(prh.created_at AS DATE) between '{}' and '{}' and prh.action = 'opened' and prh2.action = 'closed' and prh.pull_request_id = prh2.pull_request_id and 'merged' not in (select action from pull_request_history where pull_request_id = pr.id) group by pr.id;".format(m['repo'], m['owner'], m['date_before'], m['date_after'])
    #sql_close_time = "select timestampdiff(minute, prh.created_at, min(prh2.created_at)) as minutes from projects p, users u, pull_requests pr, pull_request_history prh, pull_request_history prh2 where p.id = pr.base_repo_id and pr.id = prh.pull_request_id and p.owner_id = u.id and p.name = '{}' and u.login = '{}' and prh.created_at between '{}' and '{}' and prh.action = 'opened' and prh2.action = 'closed' and prh.pull_request_id = prh2.pull_request_id group by pr.id;".format(m['repo'], m['owner'], m['date_before'], m['date_after'])
    rows_count = cursor.execute(sql_close_time_non)
    close_time = []
    median_close_time_non = 0
    if rows_count > 0:
        rows = cursor.fetchall()
        for row in rows: 
            close_time.append(row['minutes_nonmerged'])
        median_close_time_non = statistics.median(close_time)


    # Commits merged
    sql_commits_merged = "select count(*) as num_commits_merged from test_table t, pull_request_commits c where t.name = '{}' and t.login = '{}' and t.created_at between '{}' and '{}' and action = 'merged' and c.pull_request_id = t.pull_request_id group by t.pull_request_id;".format(m['repo'], m['owner'], m['date_before'], m['date_after'])
    rows_count = cursor.execute(sql_commits_merged)
    commits_m = []
    median_commits_merged = 0
    if rows_count > 0:
        rows = cursor.fetchall()
        for row in rows: 
            commits_m.append(row['num_commits_merged'])
        median_commits_merged = statistics.median(commits_m)
        
    # Commits nonmerged
    sql_commits_non =  "select count(*) as num_commits_nonmerged from test_table t, pull_request_commits c where t.name = '{}' and t.login = '{}' and t.created_at between '{}' and '{}' and action = 'closed' and 'merged' not in (select action from pull_request_history where pull_request_id = t.pull_request_id) and c.pull_request_id = t.pull_request_id group by t.pull_request_id;".format(m['repo'], m['owner'], m['date_before'], m['date_after'])
    rows_count = cursor.execute(sql_commits_non)
    commits_n = []
    median_commits_non = 0
    if rows_count > 0:
        rows = cursor.fetchall()
        for row in rows: 
            commits_n.append(row['num_commits_nonmerged'])
        median_commits_non = statistics.median(commits_n)


    if(i == 1):
        sql_lang = "select language from projects p, users u where p.name = '{}' and u.login = '{}' and p.owner_id = u.id;".format(m['repo'], m['owner'])
        cursor.execute(sql_lang)
        lang = cursor.fetchone()
        if lang is None:
            lang = ""
        else:
            lang = lang['language']

        sql_author = "select count(distinct u2.id) as num_authors from projects p, users u1, users u2, pull_requests pr, pull_request_history prh where p.id = pr.base_repo_id and pr.id = prh.pull_request_id and p.owner_id = u1.id and p.name = '{}' and u1.login = '{}' and prh.created_at < '2019-01-01' and action =  'opened' and prh.actor_id = u2.id;".format(m['repo'], m['owner'])
        cursor.execute(sql_author)
        author = cursor.fetchone()
        if author is None:
            author = 0
        else:
            author = author['num_authors']

        sql_commits = "select count(*) as num_commits from projects p, users u, commits c where project_id = p.id and p.name = '{}' and u.login = '{}' and u.id = p.owner_id;".format(m['repo'], m['owner'])
        cursor.execute(sql_commits)
        commits = cursor.fetchone()
        if commits is None:
            commits = 0
        else:
            commits = commits['num_commits']


    w_data_out.writerow([m['owner'], m['repo'], m['bot'], m['date_before'], m['date_after'], m['time'], m['intervention'], m['time_after_intervention'], num_merged, num_non, median_comments_merged, median_comments_non, median_close_time_merged, median_close_time_non, comments_bot_merged, comments_bot_nonmerged, median_commits_merged, median_commits_non, lang, author, commits])
    data_out.flush()

    if(i == 24):
        i = 1
    else:
        i = i + 1

data_out.close()